{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Azure ML & Azure Databricks notebooks by Parashar Shah.\n",
        "\n",
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Register Azure Databricks trained model and deploy it to ACI\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Please ensure you have run all previous notebooks in sequence before running this.\n",
        "\n",
        "Please Register Azure Container Instance(ACI) using Azure Portal: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-supported-services#portal in your subscription before using the SDK to deploy your ML model to ACI."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import azureml.core\n",
        "\n",
        "# Check core SDK version number\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Set auth to be used by workspace related APIs.\n",
        "# For automation or CI/CD ServicePrincipalAuthentication can be used.\n",
        "# https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.authentication.serviceprincipalauthentication?view=azure-ml-py\n",
        "auth = None"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "\n",
        "ws = Workspace.from_config(auth = auth)\n",
        "print('Workspace name: ' + ws.name, \n",
        "      'Azure region: ' + ws.location, \n",
        "      'Subscription id: ' + ws.subscription_id, \n",
        "      'Resource group: ' + ws.resource_group, sep = '\\n')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "##NOTE: service deployment always gets the model from the current working dir.\n",
        "import os\n",
        "\n",
        "model_name = \"AdultCensus_runHistory.mml\" # \n",
        "model_name_dbfs = os.path.join(\"/dbfs\", model_name)\n",
        "\n",
        "print(\"copy model from dbfs to local\")\n",
        "model_local = \"file:\" + os.getcwd() + \"/\" + model_name\n",
        "dbutils.fs.cp(model_name, model_local, True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#Register the model\n",
        "from azureml.core.model import Model\n",
        "mymodel = Model.register(model_path = model_name, # this points to a local file\n",
        "                       model_name = model_name, # this is the name the model is registered as, am using same name for both path and name.                 \n",
        "                       description = \"ADB trained model by Parashar\",\n",
        "                       workspace = ws)\n",
        "\n",
        "print(mymodel.name, mymodel.description, mymodel.version)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#%%writefile score_sparkml.py\n",
        "score_sparkml = \"\"\"\n",
        " \n",
        "import json\n",
        " \n",
        "def init():\n",
        "    # One-time initialization of PySpark and predictive model\n",
        "    import pyspark\n",
        "    import os\n",
        "    from azureml.core.model import Model\n",
        "    from pyspark.ml import PipelineModel\n",
        " \n",
        "    global trainedModel\n",
        "    global spark\n",
        " \n",
        "    spark = pyspark.sql.SparkSession.builder.appName(\"ADB and AML notebook by Parashar\").getOrCreate()\n",
        "    model_name = \"{model_name}\" #interpolated\n",
        "    # AZUREML_MODEL_DIR is an environment variable created during deployment.\n",
        "    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)\n",
        "    # For multiple models, it points to the folder containing all deployed models (./azureml-models)\n",
        "    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), model_name)\n",
        "    trainedModel = PipelineModel.load(model_path)\n",
        "    \n",
        "def run(input_json):\n",
        "    if isinstance(trainedModel, Exception):\n",
        "        return json.dumps({{\"trainedModel\":str(trainedModel)}})\n",
        "      \n",
        "    try:\n",
        "        sc = spark.sparkContext\n",
        "        input_list = json.loads(input_json)\n",
        "        input_rdd = sc.parallelize(input_list)\n",
        "        input_df = spark.read.json(input_rdd)\n",
        "    \n",
        "        # Compute prediction\n",
        "        prediction = trainedModel.transform(input_df)\n",
        "        #result = prediction.first().prediction\n",
        "        predictions = prediction.collect()\n",
        " \n",
        "        #Get each scored result\n",
        "        preds = [str(x['prediction']) for x in predictions]\n",
        "        result = \",\".join(preds)\n",
        "        # you can return any data type as long as it is JSON-serializable\n",
        "        return result.tolist()\n",
        "    except Exception as e:\n",
        "        result = str(e)\n",
        "        return result\n",
        "    \n",
        "\"\"\".format(model_name=model_name)\n",
        " \n",
        "exec(score_sparkml)\n",
        " \n",
        "with open(\"score_sparkml.py\", \"w\") as file:\n",
        "    file.write(score_sparkml)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.conda_dependencies import CondaDependencies \n",
        "\n",
        "myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) # showing how to add libs as an eg. - not needed for this model.\n",
        "\n",
        "with open(\"myenv.yml\",\"w\") as f:\n",
        "    f.write(myacienv.serialize_to_string())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#deploy to ACI\n",
        "from azureml.core.webservice import AciWebservice, Webservice\n",
        "from azureml.exceptions import WebserviceException\n",
        "from azureml.core.model import InferenceConfig\n",
        "from azureml.core.environment import Environment\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "\n",
        "\n",
        "myaci_config = AciWebservice.deploy_configuration(cpu_cores = 2, \n",
        "                                                  memory_gb = 2, \n",
        "                                                  tags = {'name':'Databricks Azure ML ACI'}, \n",
        "                                                  description = 'This is for ADB and AML example.')\n",
        "\n",
        "service_name = 'aciws'\n",
        "\n",
        "# Remove any existing service under the same name.\n",
        "try:\n",
        "    Webservice(ws, service_name).delete()\n",
        "except WebserviceException:\n",
        "    pass\n",
        "\n",
        "myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
        "# we need to add extra packages to procured environment\n",
        "# in order to deploy amended environment we need to rename it\n",
        "myenv.name = 'myenv'\n",
        "model_dependencies = CondaDependencies('myenv.yml')\n",
        "for pip_dep in model_dependencies.pip_packages:\n",
        "    myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
        "for conda_dep in model_dependencies.conda_packages:\n",
        "    myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
        "inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
        "\n",
        "myservice = Model.deploy(ws, service_name, [mymodel], inference_config, myaci_config)\n",
        "myservice.wait_for_deployment(show_output=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "help(Webservice)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#for using the Web HTTP API \n",
        "print(myservice.scoring_uri)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "\n",
        "#get the some sample data\n",
        "test_data_path = \"AdultCensusIncomeTest\"\n",
        "test = spark.read.parquet(test_data_path).limit(5)\n",
        "\n",
        "test_json = json.dumps(test.toJSON().collect())\n",
        "\n",
        "print(test_json)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#using data defined above predict if income is >50K (1) or <=50K (0)\n",
        "myservice.run(input_data=test_json)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#comment to not delete the web service\n",
        "myservice.delete()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Deploying to other types of computes\n",
        "\n",
        "In order to learn how to deploy to other types of compute targets, such as AKS, please take a look at the set of notebooks in the [deployment](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/deployment) folder."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aci-04.png)"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "pasha"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.8"
    },
    "name": "deploy-to-aci-04",
    "notebookId": 3836944406456376
  },
  "nbformat": 4,
  "nbformat_minor": 1
}